require("dplyr")

## Set working directory to Dataverse folder

survey <- read.table("Harris_Data/Harris 1997 National Issues Survey, study no. 618199/harris_s618199_spss.tab", header = TRUE)

### fix everything below for this survey 

# pid
survey$pid <- c(1:nrow(survey))

# study 
survey$study <- "618199"

# study year (year)
survey$year <- 1997

# geographic data (urban)
survey$urban <- NA

# geographic data (region)
survey$region <- NA

# respondent head of household (hh)
survey$hh <- NA

# increasing inequality (inequality)
table(survey$E1_2)
survey$inequality <- dplyr::recode(as.character(survey$E1_2),
                            `1` = "Feel",
                            `2` = "Don't Feel",
                            `11` = "Not Sure",
                            `12` = "Refused")
table(survey$inequality)

# inequality variable (inequality.variable)
survey$inequality.variable <- 1

# union (union.self)
survey$union.self <- NA
survey$union.other <- NA

# employment (employed)
survey$employed <- NA

# empl self
table(survey$G3)
survey$employed.self <- dplyr::recode(as.character(survey$G3),
                          `1` = "Employed full-time",
                          `2` = "Employed part-time",
                          `3` = "Unemployed, but looking for work",
                          `4` = "Not employed and not looking for work",
                          `11` = "Don't know",
                          `12` = "Refused")
table(survey$employed)

# occupation
survey$occupation <- NA

# occ self
survey$occupation.self <- NA

# household size (hhsize)
## this only includes adults voer 18 
table(survey$F6)
survey$hhsize <- as.character(survey$F6)

# education (educ)
table(survey$F2)
survey$educ <- dplyr::recode(survey$F2,
                      `1` = "Less than high school",
                      `2` = "High school graduate",
                      `3` = "Some college",
                      `4` = "College graduate",
                      `5` = "Post graduate",
                      `11` = "Not sure",
                      `12` = "Refused")
table(survey$educ)

# household income (income)
table(survey$F3)
survey$income <- dplyr::recode(survey$F3,
                        `1` = "Under $7500",
                        `2` = "$7,501 to $15,000",
                        `3` = "$15,001 to $25,000",
                        `4` = "$25,001 to $35,000",
                        `5` = "$35,001 to $50,000",
                        `6` = "$50,001 to $75,000",
                        `7` = "$75,001 to $100,000",
                        `8` = "$100,001 or over",
                        `11` = "Not sure",
                        `12` = "Refused")
table(survey$income)

# age
table(survey$F1)
survey$age <- as.character(survey$F1)

# race
table(survey$F5)
survey$race1 <- dplyr::recode(survey$F5,
                      `1` = "White",
                      `2` = "Black",
                      `3` = "African-American",
                      `4` = "Asian or Pacific Islander",
                      `5` = "American Indian or Alaskan native",
                      `6` = "Some other race",
                      `11` = "Not sure",
                      `12` = "Refused")
survey$race2 <- dplyr::recode(survey$F4,
                              `11` = "Decline/not sure",
                              `12` = "Decline/not sure",
                              `1` = "Yes, hispanic",
                              `2` = "No, not hispanic")
table(survey$race1)
table(survey$race2)
survey$race <- ifelse(survey$race1 == "Not sure" |
                        survey$race1 == "Refused", 
                      "Decline/not sure", ifelse(survey$race1 == "White",
                                                 ifelse(survey$race2 == "Decline/not sure",
                                                        "Decline/not sure",
                                                        ifelse(survey$race2 == "No, not hispanic",
                                                               "Non-Hispanic White",
                                                               "Hispanic White")), "Non-white"))
table(survey$race)
table(survey$race[survey$race1 == "White"])

# politics (party)
survey$party <- NA

# politics (ideology)
survey$ideology <- NA

# gender
table(survey$SEX)
survey$gender <- dplyr::recode(survey$SEX,
                        `1` = "Male",
                        `2` = "Female")
table(survey$gender)

# religion
survey$religion <- NA

#factuals
survey$factual1 <- NA
survey$factual2 <- NA
survey$factual3 <- NA

## alienation index
survey$dontcare <- dplyr::recode(survey$E1_1,
                                 `1` = "Feel",
                                 `2` = "Don't Feel",
                                 `11` = "Not Sure",
                                 `12` = "Refused")
survey$dontcount <- dplyr::recode(survey$E1_3,
                                  `1` = "Feel",
                                  `2` = "Don't Feel",
                                  `11` = "Not Sure",
                                  `12` = "Refused")
survey$leftout <- dplyr::recode(survey$E1_4,
                                `1` = "Feel",
                                `2` = "Don't Feel",
                                `11` = "Not Sure",
                                `12` = "Refused")

## question place
survey$question_place <- "no party question"

# subset
survey_618199 <- survey[,c("pid", "study", "year", "urban", "region", "hh",
                           "inequality", "inequality.variable", "union.self", "union.other",
                           "employed", "employed.self", "occupation", "occupation.self", "hhsize", "educ", "income", 
                           "age", "race", "party", "ideology", "gender", "religion",
                           "factual1", "factual2", "factual3", "dontcare", "dontcount", "leftout",
                           "question_place")]

# save file
#saveRDS(survey_618199, file = "Harris_Data/survey_618199.rds")
